PROC IMPORT
DATAFILE="D:\Amazon Price Dynamics\New Data\Data Scraped\Amazon Others_merged\Cluster Analysis\price_changes_deepfryer_July2025.csv"
OUT=price_changes_sellertype
DBMS=csv REPLACE;
run;
PROC IMPORT 
DATAFILE="D:\Amazon Price Dynamics\New Data\Data Scraped\Amazon Others_merged\Cluster Analysis\Sellerclusters_deepfryer_July2025.csv"
out = SELLER_TYPE_TREND1
DBMS=csv REPLACE;
run;
PROC IMPORT
DATAFILE="D:\Amazon Price Dynamics\New Data\Data Scraped\Amazon Others_merged\Cluster Analysis\pricechangetrend_deepfryer_July2025.csv" 
out=final_outputvector
DBMS=csv REPLACE;
run;


/***subsetting for the relevant analysis SKUs***/

data relevantSKUsonly;
set price_changes_sellertype;
where ASIN in ('B00NQ7QFGM', 'B00ANEKTTA', 'B005FYF7XQ','B01JCECNM2','B007OZXVDM') ; 
run;


data relevantSKUs;
set price_changes_sellertype;
where ASIN in ('B00NQ7QFGM', 'B00ANEKTTA', 'B005FYF7XQ','B01JCECNM2','B007OZXVDM') and seller_type in ('Amazon New', 'Cluster 1', 'Cluster 2',
'Cluster 3', 'Cluster 4', 'Cluster 5'); 
run;

data relevantSKUs_used;
set price_changes_sellertype;
where ASIN in ('B00NQ7QFGM', 'B00ANEKTTA', 'B005FYF7XQ','B01JCECNM2','B007OZXVDM') and seller_type not in ('Amazon New', 'Cluster 1', 'Cluster 2',
'Cluster 3', 'Cluster 4', 'Cluster 5'); 
run;


PROC SQL;
CREATE TABLE numsellers_newoffers AS
SELECT
TIME1,
ASIN,
COUNT(DISTINCT SELLER_ID) AS NUM_SELLERS
from 
relevantSKUs
group by
time1,
ASIN;
quit;

PROC SQL;
CREATE TABLE numsellersbyeachtype_newoffers AS
SELECT
TIME1,
ASIN,
seller_type,
COUNT(DISTINCT SELLER_ID) AS NUM_SELLERS
from 
relevantSKUsonly
group by
time1,
ASIN,
seller_type;
quit;

/**creating num sellers DV**/
proc sort data = numsellers_newoffers (keep=time1) nodupkey out= final_numsellers_outputvector;
by time1;
run;

%macro numsellers(b,c);
data numsellers&b. (keep = time1 NUM_SELLERS_&b.);
set numsellers_newoffers;
where ASIN = &c.;
rename NUM_SELLERS = NUM_SELLERS_&b. ;
run;

data final_numsellers_outputvector ;
merge final_numsellers_outputvector (in=a) numsellers&b. (in=b);
by time1;
if a ;
if a and not b then NUM_SELLERS_&b. = 0;
run;
%mend;
%numsellers(b = Tfal, c = 'B00NQ7QFGM'); 
%numsellers(b = Hbeach, c = 'B00ANEKTTA'); 
%numsellers(b = Presto, c = 'B005FYF7XQ'); 
%numsellers(b = CSN, c = 'B01JCECNM2'); 
%numsellers(b = Sec, c = 'B007OZXVDM'); 

PROC EXPORT 
DATA=final_numsellers_outputvector
DBMS=csv
OUTFILE= "D:\Amazon Price Dynamics\New Data\Data Scraped\Amazon Others_merged\Cluster Analysis\numsellers_dv2outcomedeepfryer.csv"
REPLACE;
run;


/***covariate creation**/

proc sort data = SELLER_TYPE_TREND1 out = SELLER_TYPE_TREND11;
by time1;
run;
data SELLER_TYPE_TREND2;
set SELLER_TYPE_TREND11;
if seller_type in ('3P Old Focal', '3P Old Non-Focal') then seller_type = '3P Old';
else if seller_type in ('Amazon Old Focal', 'Amazon Old Non-Focal') then seller_type = 'Amazon Old';
else seller_type = seller_type;
run;

/**seller characteristics trend***/
proc freq data = SELLER_TYPE_TREND2;
tables seller_type;
run;

/*subset for 3p new sellers*/
data SELLER_TYPE_TREND3;
set SELLER_TYPE_TREND2;
where seller_type not in ('Amazon Ne', '3P Old', 'Amazon Ol');
run;
proc contents data = SELLER_TYPE_TREND3;
run;

PROC SQL;
CREATE TABLE SELLER_charcs_byasin AS
SELECT
TIME1,
ASIN,
SELLER_TYPE,
mean(FBA) as mean_FBA,
/*std(FBA) as sd_FBA,*/
mean(Freeship) as mean_freeship,
/*std(Freeship) as sd_freeship,*/
mean(Prime) as mean_prime,
/*std(Prime) as sd_prime,*/
sum(condition) as new_offers,
mean(num_seller_Posrating) as mean_posrating,
/*std(num_seller_Posrating) as sd_posrating,*/
mean(num_seller_ratings) as mean_numratings,
/*std(num_seller_ratings) as sd_numratings,*/
mean(Seller_Rating) as mean_starrate
/*std(Seller_Rating) as sd_starrate*/
from 
SELLER_TYPE_TREND3
group by
time1,
ASIN,
seller_type;
quit;



/**converting the data by time, one row per date**/

proc contents data = SELLER_charcs_byasin;
run;

proc sort data = SELLER_charcs_byasin (keep=time1) nodupkey out= SELLER_charcs_byasin2;
by time1;
run;


%macro sellerchar(b,c);
data SELLER_charcs_byasin&b. (keep = time1 mean_FBA&b. 
mean_freeship&b. mean_numratings&b.   
mean_posrating&b. 
mean_prime&b.   
mean_starrate&b.);
set SELLER_charcs_byasin;
where ASIN = &c. ;
rename mean_FBA = mean_FBA&b.;
rename mean_freeship = mean_freeship&b.;  
rename mean_numratings = mean_numratings&b.;    
rename mean_posrating =  mean_posrating&b.;    
rename mean_prime =  mean_prime&b.;    
rename mean_starrate = mean_starrate&b.;
run;

data SELLER_charcs_byasin2 ;
merge SELLER_charcs_byasin2 (in=a) SELLER_charcs_byasin&b. (in=b);
by time1;
if a ;
if a and not b then do;
mean_FBA&b. = 0;
mean_freeship&b. = 0;
mean_posrating&b. = 0;
mean_prime&b. =0;
mean_starrate&b. = 0;
end;
run;
%mend;
%sellerchar(b = Tfal_3p, c = 'B00NQ7QFGM'); 
%sellerchar(b = Hbeach_3p, c = 'B00ANEKTTA');  
%sellerchar(b = PR_3p, c = 'B005FYF7XQ'); 
%sellerchar(b = CSN_3p, c = 'B01JCECNM2'); 
%sellerchar(b = SEC_3p, c = 'B007OZXVDM'); 

/**coding all missings with 0 values**/

proc contents data = SELLER_charcs_byasin2;
run;

data seller_characs_byasin3;
set SELLER_charcs_byasin2;
if	mean_FBACSN_3p	=.	then 	mean_FBACSN_3p	=0;
if	mean_FBAHbeach_3p	=.	then 	mean_FBAHbeach_3p	=0;
if	mean_FBAPR_3p	=.	then 	mean_FBAPR_3p	=0;
if	mean_FBASEC_3p	=.	then 	mean_FBASEC_3p	=0;
if	mean_FBATfal_3p	=.	then 	mean_FBATfal_3p	=0;
if	mean_freeshipCSN_3p	=.	then 	mean_freeshipCSN_3p	=0;
if	mean_freeshipHbeach_3p	=.	then 	mean_freeshipHbeach_3p	=0;
if	mean_freeshipPR_3p	=.	then 	mean_freeshipPR_3p	=0;
if	mean_freeshipSEC_3p	=.	then 	mean_freeshipSEC_3p	=0;
if	mean_freeshipTfal_3p	=.	then 	mean_freeshipTfal_3p	=0;
if	mean_numratingsCSN_3p	=.	then 	mean_numratingsCSN_3p	=0;
if	mean_numratingsHbeach_3p	=.	then 	mean_numratingsHbeach_3p	=0;
if	mean_numratingsPR_3p	=.	then 	mean_numratingsPR_3p	=0;
if	mean_numratingsSEC_3p	=.	then 	mean_numratingsSEC_3p	=0;
if	mean_numratingsTfal_3p	=.	then 	mean_numratingsTfal_3p	=0;
if	mean_posratingCSN_3p	=.	then 	mean_posratingCSN_3p	=0;
if	mean_posratingHbeach_3p	=.	then 	mean_posratingHbeach_3p	=0;
if	mean_posratingPR_3p	=.	then 	mean_posratingPR_3p	=0;
if	mean_posratingSEC_3p	=.	then 	mean_posratingSEC_3p	=0;
if	mean_posratingTfal_3p	=.	then 	mean_posratingTfal_3p	=0;
if	mean_primeCSN_3p	=.	then 	mean_primeCSN_3p	=0;
if	mean_primeHbeach_3p	=.	then 	mean_primeHbeach_3p	=0;
if	mean_primePR_3p	=.	then 	mean_primePR_3p	=0;
if	mean_primeSEC_3p	=.	then 	mean_primeSEC_3p	=0;
if	mean_primeTfal_3p	=.	then 	mean_primeTfal_3p	=0;
if	mean_starrateCSN_3p	=.	then 	mean_starrateCSN_3p	=0;
if	mean_starrateHbeach_3p	=.	then 	mean_starrateHbeach_3p	=0;
if	mean_starratePR_3p	=.	then 	mean_starratePR_3p	=0;
if	mean_starrateSEC_3p	=.	then 	mean_starrateSEC_3p	=0;
if	mean_starrateTfal_3p	=.	then 	mean_starrateTfal_3p	=0;

run;


proc sort data = seller_characs_byasin3 nodupkey out=seller_characs_byasin4 ;
by  TIME1 ;
run;


proc expand data=seller_characs_byasin4 out=seller_characs_byasin5 method = none; 
convert	mean_FBACSN_3p	=	mean_FBACSN_3p_lag1	/transformout = (lag 1);
convert	mean_FBAHbeach_3p	=	mean_FBAHbeach_3p_lag1	/transformout = (lag 1);
convert	mean_FBAPR_3p	=	mean_FBAPR_3p_lag1	/transformout = (lag 1);
convert	mean_FBASEC_3p	=	mean_FBASEC_3p_lag1	/transformout = (lag 1);
convert	mean_FBATfal_3p	=	mean_FBATfal_3p_lag1	/transformout = (lag 1);
convert	mean_freeshipCSN_3p	=	mean_freeshipCSN_3p_lag1	/transformout = (lag 1);
convert	mean_freeshipHbeach_3p	=	mean_freeshipHbeach_3p_lag1	/transformout = (lag 1);
convert	mean_freeshipPR_3p	=	mean_freeshipPR_3p_lag1	/transformout = (lag 1);
convert	mean_freeshipSEC_3p	=	mean_freeshipSEC_3p_lag1	/transformout = (lag 1);
convert	mean_freeshipTfal_3p	=	mean_freeshipTfal_3p_lag1	/transformout = (lag 1);
convert	mean_numratingsCSN_3p	=	mean_numratingsCSN_3p_lag1	/transformout = (lag 1);
convert	mean_numratingsHbeach_3p	=	mean_numratingsHbeach_3p_lag1	/transformout = (lag 1);
convert	mean_numratingsPR_3p	=	mean_numratingsPR_3p_lag1	/transformout = (lag 1);
convert	mean_numratingsSEC_3p	=	mean_numratingsSEC_3p_lag1	/transformout = (lag 1);
convert	mean_numratingsTfal_3p	=	mean_numratingsTfal_3p_lag1	/transformout = (lag 1);
convert	mean_posratingCSN_3p	=	mean_posratingCSN_3p_lag1	/transformout = (lag 1);
convert	mean_posratingHbeach_3p	=	mean_posratingHbeach_3p_lag1	/transformout = (lag 1);
convert	mean_posratingPR_3p	=	mean_posratingPR_3p_lag1	/transformout = (lag 1);
convert	mean_posratingSEC_3p	=	mean_posratingSEC_3p_lag1	/transformout = (lag 1);
convert	mean_posratingTfal_3p	=	mean_posratingTfal_3p_lag1	/transformout = (lag 1);
convert	mean_primeCSN_3p	=	mean_primeCSN_3p_lag1	/transformout = (lag 1);
convert	mean_primeHbeach_3p	=	mean_primeHbeach_3p_lag1	/transformout = (lag 1);
convert	mean_primePR_3p	=	mean_primePR_3p_lag1	/transformout = (lag 1);
convert	mean_primeSEC_3p	=	mean_primeSEC_3p_lag1	/transformout = (lag 1);
convert	mean_primeTfal_3p	=	mean_primeTfal_3p_lag1	/transformout = (lag 1);
convert	mean_starrateCSN_3p	=	mean_starrateCSN_3p_lag1	/transformout = (lag 1);
convert	mean_starrateHbeach_3p	=	mean_starrateHbeach_3p_lag1	/transformout = (lag 1);
convert	mean_starratePR_3p	=	mean_starratePR_3p_lag1	/transformout = (lag 1);
convert	mean_starrateSEC_3p	=	mean_starrateSEC_3p_lag1	/transformout = (lag 1);
convert	mean_starrateTfal_3p	=	mean_starrateTfal_3p_lag1	/transformout = (lag 1);

run;


/*****REMAINING COVARIATES ARE FROM THE ORIGINAL DV - FINAL DATA SET*****/

/*PROC IMPORT*/
/*DATAFILE="D:\Amazon Price Dynamics\New Data\GAM + MVRF\Other categories\Deep Fryers\final_gam_modeldata.csv" */
/*out=final_GAMMODELDATA*/
/*DBMS=csv REPLACE;*/
/*run;*/

PROC IMPORT
DATAFILE="D:\Amazon Price Dynamics\New Data\GAM + MVRF\Other categories\Deep Fryers\final_gam_modeldata_jmrnd4.csv" 
out=final_gam_modeldata
DBMS=csv REPLACE;
run;


proc sort data = final_numsellers_outputvector out = sorted_finalnumsellersoutput;
by time1;
run;

proc sort data = final_gam_modeldata out = sorted_final_GAMMODELDATA;
by time1;
run;


proc sort data = seller_characs_byasin5 out = sorted_seller_characs_byasin5;
by time1;
run;


data finalnumsellers_modeldata;
merge sorted_finalnumsellersoutput (in=a) sorted_final_GAMMODELDATA (in=b) sorted_seller_characs_byasin5 (in=c);
by time1;
if a ;
run;



/**Adding special Amazon holidays***/

/*Valentine's day sale: Feb 5th - Feb 14th 

Easter day sale: March 19-March 26 (2018)

Mothers day: May 13 (2018)

Amazon prime day: JUly 16 (2018); post-prime day sale July 18 (2018)

Black Friday:
    Nov 17th - Nov 24th (2017)
    Nov 16th - Nov 23rd (2018)

Cyber Monday:
	Nov 27th (2017), Nov 26th (2018)

Amazon digital day: Dec 29 (2017), Dec 28 (2018)

12 days of deal sale: Dec 3rd - Dec 14 (2017), Dec 2nd - Dec 13 (2018)

Christmas day sale: Dec 20-Dec25

Hanukkah sale: Dec 1- Dec 7

Post Christmas sale: Dec 26- Jan 1st*/

data finalnumsellers_modeldata1;
set finalnumsellers_modeldata;
if time1 in ( '01Jan2018'd , 
              '05Feb2018'd, '06Feb2018'd,'07Feb2018'd,'08Feb2018'd,'09Feb2018'd,'10Feb2018'd,'11Feb2018'd,
              '12Feb2018'd, '13Feb2018'd,'14Feb2018'd,
			  '19Mar2018'd, '20Mar2018'd,'21Mar2018'd,'22Mar2018'd,'23Mar2018'd,'24Mar2018'd,'25Mar2018'd,'26Mar2018'd,
			  '13May2018'd, '16Jul2018'd, '18Jul2018'd, 
              '17Nov2017'd,'18Nov2017'd,'19Nov2017'd,'20Nov2017'd,'21Nov2017'd,'22Nov2017'd, '23Nov2017'd,'24Nov2017'd,'27Nov2017'd,
			  '16Nov2018'd,'17Nov2018'd,'18Nov2018'd,'19Nov2018'd,'20Nov2018'd,'21Nov2018'd,'22Nov2018'd, '23Nov2018'd,'26Nov2018'd,
              '03Dec2017'd,'04Dec2017'd,'05Dec2017'd,'06Dec2017'd,'07Dec2017'd,'08Dec2017'd,
              '09Dec2017'd,'10Dec2017'd,'11Dec2017'd,'12Dec2017'd,'13Dec2017'd,'14Dec2017'd,
			  '02Dec2018'd,'03Dec2018'd,'04Dec2018'd,'05Dec2018'd,'06Dec2018'd,'07Dec2018'd,'08Dec2018'd,
              '09Dec2018'd,'10Dec2018'd,'11Dec2018'd,'12Dec2018'd,'13Dec2018'd) then seasonal_sale = 1;
else if time1 >= '20Dec2017'd and time1 <= '31Dec2017'd then seasonal_sale = 1;
else if time1 >= '20Dec2018'd and time1 <= '31Dec2018'd then seasonal_sale = 1;
else seasonal_sale = 0;
run;

proc freq data = finalnumsellers_modeldata1;
tables season*seasonal_sale /list;
run;



/*PROC EXPORT */
/*DATA=finalnumsellers_modeldata1*/
/*DBMS=csv*/
/*OUTFILE= "D:\Amazon Price Dynamics\New Data\GAM + MVRF\Other categories\Deep Fryers\finalnumsellers_modeldata1.csv"*/
/*REPLACE;*/
/*run;*/

PROC EXPORT 
DATA=finalnumsellers_modeldata1
DBMS=csv
OUTFILE= "D:\Amazon Price Dynamics\New Data\GAM + MVRF\Other categories\Deep Fryers\finalnumsellers_modeldata_Rnd4.csv"
REPLACE;
run;

